About: this statistics are compilated with Sidewalks,Crossings and Kerbs data.
All the code is kept here, so anyone can reproduce, the original jupyter notebook is avaliable at the repository!!
Scroll down and the charts will begin to appear, they we're made with the amazing Altair library, that enables interactivity
currently it's only optimized for desktop
from datetime import datetime
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
print('Last Update: ',dt_string)
Last Update: 29/06/2022 10:06:12
import geopandas as gpd
import pandas as pd
import altair as alt
def get_count_df(input_df,fieldname,str_to_append=' type'):
outfieldname = fieldname+str_to_append
return input_df[fieldname].value_counts().reset_index().rename(columns={'index':outfieldname,fieldname:'count'}).sort_values(by='count',ascending=False),outfieldname
def create_barchart(input_df,fieldname,title,str_to_append=' type',title_fontsize=24,tooltip='count'):
# bind = alt.selection_interval(bind='scales')
# .add_selection(bind)
data_to_plot,fieldname_v2 = get_count_df(input_df,fieldname,str_to_append)
feat_count = float(data_to_plot['count'].sum())
def compute_formatted_percent(featureval):
return str(round((featureval/feat_count)*100,2))+"%"
data_to_plot['percent'] = data_to_plot['count'].apply(compute_formatted_percent)
return alt.Chart(data_to_plot,title=title).mark_bar().encode(
x=alt.X(fieldname_v2,sort='-y'),
y='count',
tooltip='percent',
).properties(
width=650,
height=300).configure_title(fontSize=title_fontsize).interactive()
def create_barchartV2(input_gdf,fieldname,title,str_to_append=' type',title_fontsize=24,len_field='length(km)'):
# bind = alt.selection_interval(bind='scales')
# .add_selection(bind)
fieldname_v2 = fieldname+str_to_append
data_to_plot = input_gdf[[len_field,fieldname]].groupby([fieldname]).agg({fieldname:'count',len_field:'sum'}).rename(columns={fieldname:'feature count'}).reset_index().rename(columns={fieldname:fieldname_v2})
return alt.Chart(data_to_plot,title=title).mark_bar().encode(
x=alt.X(fieldname_v2,sort='-y'),
y=len_field,
tooltip=len_field,
color='feature count'
).properties(
width=650,
height=300).configure_title(fontSize=title_fontsize).interactive()
def print_relevant_columnames(input_df,not_include=('score','geometry','type','id')):
print(*[f'{column}, ' for column in input_df.columns if not any(word in column for word in not_include)])
def return_weblink(string_id,type='way'):
return f"<a href=https://www.openstreetmap.org/{type}/{string_id}>{string_id}</a>"
def get_year_surveydate(featuredate):
return featuredate.split('-')[0]
sidewalks_gdf = gpd.read_file('../data/sidewalks.geojson')
sidewalks_data = pd.DataFrame(sidewalks_gdf)
# compute lengths only once:
sidewalks_gdf['length(km)'] = sidewalks_gdf.to_crs('EPSG:31982').length/1000
sidewalks_gdf['weblink'] = sidewalks_gdf['id'].astype('string').apply(return_weblink)
sidewalks_gdf['Year of Survey'] = sidewalks_gdf['survey:date'].apply(get_year_surveydate)
array(['?', '2019'], dtype=object)
printing relevant columns on the data:
print_relevant_columnames(sidewalks_gdf)
bicycle, footway, highway, name, foot, lcn, motor_vehicle, segregated, access, horse, oneway, maxspeed, layer, lit, surface, tunnel, incline, smoothness, opening_hours, cutting, embankment, dog, wheelchair, level, cycleway, cycleway:right, ramp, noname, crossing, alt_name, source, handrail, ramp:wheelchair, step_count, tactile_paving, kerb, traffic_signals, survey:date, mapillary, description, paving_stones, barrier, incline:across, length(km), weblink,
create_barchartV2(sidewalks_data,'surface','Sidewalks Surface Type',title_fontsize=24)
create_barchartV2(sidewalks_data,'smoothness','Sidewalks Smoothness Level',title_fontsize=24)
create_barchartV2(sidewalks_data,'tactile_paving','Sidewalks Tactile Paving Presence',title_fontsize=24)
create_barchartV2(sidewalks_data,'width','Sidewalks Width Values',title_fontsize=24)
create_barchartV2(sidewalks_data,'incline','Sidewalks Incline Values',title_fontsize=24)
def double_scatter_bar(input_df,title,xs='surface',ys='smoothness',scolor=None,xh='count()',yh1='surface',yh2='smoothness',hcolor=None,fontsize=24):
interval = alt.selection_interval()
default_color = alt.value('lightseagreen')
if not hcolor:
hcolor = default_color
if not scolor:
scolor = default_color
scatter = alt.Chart(input_df,title=title).mark_point().encode(
x=xs,
y=ys,
color=scolor,
tooltip=alt.Tooltip(['type','id']),
).add_selection(interval)
hist_base = alt.Chart(sidewalks_gdf).mark_bar().encode(
x=xh,
color=hcolor,
tooltip=alt.Tooltip(['type','id']),
).properties(
width=300,
height=220,
).transform_filter(
interval,
)
# if hcolor:
# hist_base.encode(color=hcolor)
hist = hist_base.encode(y=yh1) | hist_base.encode(y=yh2)
return (scatter & hist).configure_title(fontSize=fontsize,align='center')
# 'Surface x Smoothness'
double_scatter_bar(sidewalks_gdf,'Surface x Smoothness (sidewalks)',hcolor='length(km)')
create_barchart(sidewalks_gdf,'Year of Survey','Year of Survey Image (sidewalks)')
crossings_gdf = gpd.read_file('../data/crossings.geojson')
crossings_data = pd.DataFrame(crossings_gdf)
# compute lengths only once:
crossings_gdf['length(km)'] = crossings_gdf.to_crs('EPSG:31982').length/1000
crossings_gdf['weblink'] = crossings_gdf['id'].astype('string').apply(return_weblink)
crossings_gdf['Year of Survey'] = crossings_gdf['survey:date'].apply(get_year_surveydate)
print_relevant_columnames(crossings_gdf)
crossing, footway, highway, kerb, surface, tactile_paving, traffic_calming, bicycle, name, foot, lit, segregated, layer, access, alt_name, horse, note, motor_vehicle, incline, lcn, crossing:island, lanes, oneway, level, cycleway, cycleway:right, smoothness, wheelchair, mapillary, survey:date, length(km), weblink,
create_barchart(crossings_gdf,'crossing','Crossing Type')
create_barchart(crossings_gdf,'surface','Crossing Surface')
double_scatter_bar(crossings_gdf,'Surface x Smoothness (crossings)',hcolor='crossing',scolor='crossing')
create_barchart(crossings_gdf,'Year of Survey','Year of Survey Image (crossings)')
kerbs_gdf = gpd.read_file('../data/kerbs.geojson')
kerbs_data = pd.DataFrame(kerbs_gdf)
kerbs_gdf['Year of Survey'] = kerbs_gdf['survey:date'].apply(get_year_surveydate)
print_relevant_columnames(kerbs_gdf)
crossing, crossing_ref, highway, kerb, tactile_paving, traffic_calming, bicycle, mapillary, survey:date, wheelchair, button_operated, traffic_signals:sound, traffic_signals:vibration, crossing:island, image, barrier, surface, kerb:height, traffic_signals, smoothness, description,
create_barchart(kerbs_gdf,'kerb','Kerb Type')
create_barchart(kerbs_gdf,'tactile_paving','Kerb Tactile Paving Presence')
create_barchart(kerbs_gdf,'wheelchair','Kerb Wheelchair Acessibility')
create_barchart(kerbs_gdf,'kerb:height','Kerb Height')
create_barchart(kerbs_gdf,'Year of Survey','Year of Survey Image (kerbs)')
double_scatter_bar(kerbs_gdf,' ',xs='kerb',ys='tactile_paving',yh1='kerb',yh2='tactile_paving',xh='count()',hcolor='wheelchair',scolor='wheelchair')
!jupyter nbconvert --to html statistics.ipynb
[NbConvertApp] Converting notebook statistics.ipynb to html [NbConvertApp] Writing 7578361 bytes to statistics.html